* SOURCES
* The 1965, 1973 and 1985 surveys can be obtained from the 
* Australian Social Science Data Archive <http://assda.anu.edu.au/>
* The 2004 survey is the unconfidentialised version of the HILDA
* survey, and can be obtained via the Melbourne Institute <http://melbourneinstitute.com/hilda/>

version 10.0

***********************************************
* 1965 Intergenerational earnings correlations
* This dataset is unweighted
* Note that 1/3rd of those in this dataset are women.
* We can't run a specification with unemployment, since all respondents are employed.
* 89 unique values for fathers' occupations, 94 for sons
*********************************************** 
clear
set more off
cd <DIRECTORY>
use socialmobility1965.dta, clear
* Keep men aged 25-54 in FT employment
keep if AGE>=2 & AGE<=7 & SEX==0 
* Income coded to midpoint of band. Top band coded to 115% of top limit (2500 pounds)
recode INCOME 0=450 1=1100 2=1500 3=1900 4=2300 5=2875 6=.
recode AGE 2=27 3=32 4=37 5=42 6=47 7=52
gen age=AGE
gen age2=age^2
gen hwage=ln(INCOME/(40*48)) if FULLTIME==0
replace hwage=ln(INCOME/(20*48)) if FULLTIME==1
xi: reg hwage age age2 i.OCCUPP
replace age=40 
replace age2=1600
predict hwage_dad if e(sample)
for any hwage_dad: gen t_X=X \ replace X=.
levelsof OCCUPP, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad if OCCUPP==`o'
	egen temp2=max(temp1)
	replace hwage_dad=temp2 if FATHOCC==`o'
	drop temp1 temp2
	}
drop t_*
replace age=AGE
replace age2=age^2

* Elasticity - full sample
reg hwage hwage_dad age age2,r
outreg using results.doc, se nocons bracket noaster replace bdec(3) ct("IGE 1965")

* Elasticity for Australian-born fathers and sons
reg hwage hwage_dad age age2 if MBRTHPLC==0 & MFATHBPL==0,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 1965 - Native")

* Summary stats
reg hwage hwage_dad age age2,r
for any hwage hwage_dad: replace X=exp(X)/(.104/2)
log using sumstats.smcl, replace
di "1965"
sum hwage hwage_dad if e(sample)
log close

***********************************************
* 1973 Intergenerational earnings correlations
* There are two separate datasets for men and women.
* Notes on variables not used: 
* Dad born in Australia if V21==1/9. (Or if you want to be more precise, year father came to Aust is V23)
* 218 unique values for fathers' occupations, 269 for sons
*********************************************** 
set more off
cd  <DIRECTORY>
use socialmobility1973m.dta, clear
ren V301 OCCUPP 
ren V24 FATHOCC
for any OCCUPP FATHOCC: recode X 0=. 999=.
ren V523 weight

* Keep men aged 25-54 (note that the entire dataset is men, so no need to restrict there)
gen age=1973-V77
keep if age>=25 & age<=54
* Income coded to midpoint of band. Top band coded to 115% of top limit (top limit=$240)
ren V390 INCOME
recode INCOME 0=. 1=10 2=25 3=35 4=45 5=55 6=65 7=75 8=85 9=95 10=105 11=115 12=130 13=150 14=180 15=220 16=276 17=. 99=.
* Dropping those who are not working now
keep if V278==1 
gen age2=age^2
gen hwage=ln(INCOME/V305) 
xi: reg hwage age age2 i.OCCUPP [aw=weight]
replace age=40 
replace age2=1600
predict hwage_dad if e(sample)
for any hwage_dad: gen t_X=X \ replace X=.
levelsof OCCUPP, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad if OCCUPP==`o'
	egen temp2=max(temp1)
	replace hwage_dad=temp2 if FATHOCC==`o'
	drop temp1 temp2
	}
drop t_*

* Creating 'broadbanded' own income and father's income (for robustness check)
xtile temp=INCOME [aw=weight],nq(6)
bysort temp: egen income_broad=mean(INCOME)
drop temp
gen hwage_broad=ln(income_broad/V305) 
replace age=1973-V77
replace age2=age^2
xi: reg hwage_broad age age2 i.OCCUPP
replace age=40 
replace age2=1600
predict hwage_dad_broad if e(sample)
for any hwage_dad_broad: gen t_X=X \ replace X=.
levelsof OCCUPP, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad_broad if OCCUPP==`o'
	egen temp2=max(temp1)
	replace hwage_dad_broad=temp2 if FATHOCC==`o'
	drop temp1 temp2
	}
drop t_*
replace age=1973-V77
replace age2=age^2
xtile temp1=hwage_dad_broad,nq(80)
bysort temp1: egen temp2=mean(hwage_dad_broad)
replace hwage_dad_broad=temp2
drop temp*

* Elasticity
reg hwage hwage_dad age age2 [aw=weight],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 1973")
* Elasticity for Australian-born fathers and sons
reg hwage hwage_dad age age2 [aw=weight] if V21>=1 & V21<=9 & V78>=1 & V78<=9,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 1973- Native")

* Broadbanded IGE estimates
reg hwage_broad hwage_dad_broad age age2 [aw=weight],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 1973")
reg hwage_broad hwage_dad_broad age age2 [aw=weight] if V21>=1 & V21<=9 & V78>=1 & V78<=9,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 1973- Native")

* Summary stats
reg hwage hwage_dad age age2 [aw=weight],r
for any hwage hwage_dad: replace X=exp(X)/.149
log using sumstats.smcl, append
di "1973"
sum hwage hwage_dad [aw=weight] if e(sample)
log close

* Creating a dataset to merge with HILDA data
replace hwage=ln(INCOME/V305) 
xi: reg hwage age age2 i.OCCUPP [aw=weight]
replace age=40 
replace age2=1600
predict hwage_dad1973 if e(sample)
gen earnings=ln(INCOME) 
xi: reg earnings age age2 i.OCCUPP [aw=weight]
replace age=40 
replace age2=1600
predict earnings_dad1973 if e(sample)

collapse earnings_dad1973 hwage_dad1973 [aw=weight], by(OCCUPP)
drop if hwage_dad1973==.
ren OCCUPP occcode1971
sort occcode1971
merge occcode1971 using occ_xwalk_1971_2004, nokeep
tab _merge
keep if _merge==3
drop _merge occname2004
ren occcode2004 dfmfocc
sort dfmfocc
save father_wages_1973, replace

***********************************************
* 1987 NSSS Inequality
* This dataset is unweighted
*********************************************** 
cd  <DIRECTORY>
use d0627.dta, clear
ren faocc focc
for any occ focc: recode X 9994/max=. 
for any incjob: recode X 0=. 9999997/max=.
gen inc=.
for num 3 \ num 1: replace inc=incjob*Y if incper==X
recode bornyear 90/max=.
gen age=87-bornyear
* Keep men aged 25-54 who are working
keep if age>=25 & age<=54 & sex==1 & (worknow==1 | worknow==2)
gen hwage=ln(inc/(48*40)) if worknow==1
replace hwage=ln(inc/(48*20)) if worknow==2
gen age2=age^2
xi: reg hwage age age2 i.occ
replace age=40 
replace age2=1600
predict hwage_dad if e(sample)
for any hwage_dad: gen t_X=X \ replace X=.
levelsof occ, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad if occ==`o'
	egen temp2=max(temp1)
	replace hwage_dad=temp2 if focc==`o'
	drop temp1 temp2
	}
drop t_*

* Creating 'broadbanded' own income and father's income (for robustness check)
* Note that we don't need to worry about collapsing occ categories here, as there are 78 occupations.
xtile temp=inc,nq(6)
bysort temp: egen income_broad=mean(inc)
drop temp
gen hwage_broad=ln(income_broad/(48*40)) if worknow==1
replace hwage_broad=ln(income_broad/(48*20)) if worknow==2
replace age=87-bornyear
replace age2=age^2
xi: reg hwage_broad age age2 i.occ
replace age=40 
replace age2=1600
predict hwage_dad_broad if e(sample)
for any hwage_dad_broad: gen t_X=X \ replace X=.
levelsof occ, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad_broad if occ==`o'
	egen temp2=max(temp1)
	replace hwage_dad_broad=temp2 if focc==`o'
	drop temp1 temp2
	}
drop t_*

replace age=87-bornyear
replace age2=age^2

* Main 1987 IGE
reg hwage hwage_dad age age2,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 1987")
* IGE for Australian-born fathers and sons
reg hwage hwage_dad age age2 if wherebrn>=1 & wherebrn<=6 & fawhere>=1 & fawhere<=6,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 1987-Native")

* Broadbanded IGE
reg hwage_broad hwage_dad_broad age age2,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 1987")
reg hwage_broad hwage_dad_broad age age2 if wherebrn>=1 & wherebrn<=6 & fawhere>=1 & fawhere<=6,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 1987- Native")

* Summary stats
reg hwage hwage_dad age age2 ,r
for any hwage hwage_dad: replace X=exp(X)/.575
log using sumstats.smcl, append
di "1987"
sum hwage hwage_dad if e(sample)
log close

***********************************************
* HILDA Intergenerational earnings elasticity
* Needs to be run on secure data computer
*********************************************** 
cd <DIRECTORY>
clear
set mem 10m
set more off

for any d: use xwaveid Xwsfei Xhhfxid Xcaeft Xfmfuemp Xcapune Xanbcob Xfmfcob Xjbmocc Xwscmg Xjbmhruc Xhhwtrp Xlnwte Xjbmhruc Xjbmocc Xfmfocc Xhgsex Xhgage using "Combined d41u.dta", clear
for any a b c: sort xwaveid \ merge xwaveid using "Combined X41u.dta", keep(Xwsfei Xwscmg Xjbmhruc Xhhfxid Xcaeft) nokeep \ drop _merge
keep if dhgsex==1 & dhgage>=25 & dhgage<=54
* Recoding missing wages and occupation codes as "."
for any dfmfocc djbmocc dwscmg djbmhruc: recode X -10/-1=.
* Recoding minimum weights to zero
for any dhhwtrp dlnwte: recode X min/0=0

* Unemployment correlations
recode dfmfuemp -10/-1=.
recode dcapune -10=.
tab dfmfuemp [aw=dhhwtrp]
for num 0 100: gen dcapuneX=dcapune \ recode dcapuneX X=1 .=. *=0
bysort dfmfuemp: sum dcapune* [aw=dhhwtrp]

gen hwage=ln(dwscmg/(djbmhruc))
gen hwage4=ln(((awscmg/(ajbmhruc))+(bwscmg/(bjbmhruc))+(cwscmg/(cjbmhruc))+(dwscmg/(djbmhruc)))/4) if ajbmhruc>0 & bjbmhruc>0 & cjbmhruc>0 & djbmhruc>0 & awscmg>0 & bwscmg>0 & cwscmg>0 & dwscmg>0
*Xwscmg is weekly earnings; Xwsfei is annual earnings
gen earnings=ln(dwsfei)
gen earnings4=ln((awsfei+bwsfei+cwsfei+dwsfei)/4) if awsfei>0 & bwsfei>0 & cwsfei>0 & dwsfei>0
gen age=dhgage
gen age2=dhgage^2
xi: reg earnings age age2 i.djbmocc 
replace age=40 
replace age2=1600
predict earnings_dad if e(sample)
replace age=dhgage
replace age2=dhgage^2
xi: reg hwage age age2 i.djbmocc
replace age=40 
replace age2=1600
predict hwage_dad if e(sample)
replace age=dhgage
replace age2=dhgage^2
xi: reg hwage4 age age2 i.djbmocc 
replace age=40 
replace age2=1600
predict hwage_dad4 if e(sample)
replace age=dhgage
replace age2=dhgage^2
xi: reg earnings4 age age2 i.djbmocc 
replace age=40 
replace age2=1600
predict earnings_dad4 if e(sample)

for any earnings_dad earnings_dad4 hwage_dad hwage_dad4: gen t_X=X \ replace X=.
levelsof djbmocc, local(occ)
foreach o of local occ {
	gen temp1=t_earnings_dad if djbmocc==`o'
	egen temp2=max(temp1)
	replace earnings_dad=temp2 if dfmfocc==`o'

	gen temp3=t_hwage_dad if djbmocc==`o'
	egen temp4=max(temp3)
	replace hwage_dad=temp4 if dfmfocc==`o'

	gen temp5=t_hwage_dad4 if djbmocc==`o'
	egen temp6=max(temp5)
	replace hwage_dad4=temp6 if dfmfocc==`o'

	gen temp7=t_earnings_dad4 if djbmocc==`o'
	egen temp8=max(temp7)
	replace earnings_dad4=temp8 if dfmfocc==`o'

	drop temp1 temp2 temp3 temp4 temp5 temp6 temp7 temp8
	}
drop t_*

* Creating 'broadbanded' own income and father's income (for robustness check)
xtile temp=dwscmg,nq(6)
bysort temp: egen income_broad=mean(dwscmg)
drop temp
gen hwage_broad=ln(income_broad/djbmhruc) 
replace age=dhgage
replace age2=dhgage^2
xi: reg hwage_broad age age2 i.djbmocc
replace age=40 
replace age2=1600
predict hwage_dad_broad if e(sample)
for any hwage_dad_broad: gen t_X=X \ replace X=.
levelsof djbmocc, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad_broad if djbmocc==`o'
	egen temp2=max(temp1)
	replace hwage_dad_broad=temp2 if dfmfocc==`o'
	drop temp1 temp2
	}
drop t_*
xtile temp1=hwage_dad_broad,nq(80)
bysort temp1: egen temp2=mean(hwage_dad_broad)
replace hwage_dad_broad=temp2
drop temp*

for any earnings_dad earnings_dad4 hwage_dad hwage_dad4 hwage_dad_broad: la var X "X"

* IGE
replace age=dhgage
replace age2=dhgage^2
reg hwage hwage_dad age age2 [aw=dhhwtrp] ,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004")
gen mainsample=1 if e(sample)

* IGE for Australian-born fathers and sons
reg hwage hwage_dad age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101 ,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native")
gen nativesample=1 if e(sample)

* Broadbanded IGEs
reg hwage_broad hwage_dad_broad age age2 [aw=dhhwtrp],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 2004")
reg hwage_broad hwage_dad_broad age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101,r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("Broad - IGE 2004- Native")

* Quintiles
reg hwage age age2
predict hwage_res if e(sample), resid
log using quintiles.smcl, replace
for any hwage_res hwage_dad: xtile X5=X [aw=dhhwtrp] if e(sample),nq(5)
tab hwage_res5 hwage_dad5 [aw=dhhwtrp], nofreq col
log close

* Using 4-year average wages for *fathers* 
reg hwage hwage_dad4 age age2 [aw=dhhwtrp],r
outreg using results_robust1.doc, se nocons bracket noaster replace bdec(3) ct("IGE 2004-4yr")
reg hwage hwage_dad4 age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-4yr-Native")

* Robustness Check: Using 1973 father wages
sort dfmfocc 
merge dfmfocc using father_wages_1973, nokeep
tab _merge
for any hwage_dad1973 earnings_dad1973: la var X "X"

* Robustness Check: Two sample IV
reg hwage hwage_dad1973 age age2 [aw=dhhwtrp] if mainsample==1,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-2sample")
codebook occcode1971 dfmfocc if e(sample)
sum hwage hwage_dad1973 [aw=dhhwtrp] if e(sample)
reg hwage hwage_dad1973 age age2 [aw=dhhwtrp] if nativesample==1 ,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native-2sample")
reg hwage4 hwage_dad1973 age age2 [aw=dhhwtrp] if mainsample==1,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-2sample-4yr")
codebook occcode1971 dfmfocc if e(sample)
reg hwage4 hwage_dad1973 age age2 [aw=dhhwtrp] if nativesample==1,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native-2sample-4yr")

* Robustness Check: Using annual earnings
reg earnings earnings_dad age age2 [aw=dhhwtrp] if mainsample==1,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004")
sum earnings earnings_dad [aw=dhhwtrp] if e(sample)
reg earnings earnings_dad age age2 [aw=dhhwtrp] if nativesample==1,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native")
reg earnings4 earnings_dad4 age age2 [aw=dlnwte],r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-4yr")

* Robustness check - restricting to 30-49 year olds
reg hwage hwage_dad age age2 [aw=dhhwtrp] if age>=30 & age<=49,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004, 30-50yo")
reg hwage hwage_dad age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101 & age>=30 & age<=49,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native, 30-50yo")

* Decomposition of hours and hourly wage dispersion into within-occ and between-occ
gen annual_hours=exp(earnings)/exp(hwage)
recode annual_hours .=. 3120/max=3120
gen wage_dollars=exp(hwage)
gen annual_dollars=exp(earnings)
bysort djbmocc: egen temp1=count(wage_dollars) if mainsample==1 
gen occ1=djbmocc if temp1>=2 & temp1~=.
bysort djbmocc: egen temp2=count(annual_hours) if mainsample==1 
gen occ2=djbmocc if temp2>=2 & temp2~=.
bysort djbmocc: egen temp3=count(annual_dollars) if mainsample==1 
gen occ3=djbmocc if temp3>=2 & temp3~=.
drop temp1 temp2 temp3
for num 1/3 \ any wage_dollars annual_hours annual_dollars : qui reg Y age age2 \ qui predict Y_r if e(sample), resid \ qui ineqdeco Y_r [aw=dhhwtrp] if mainsample==1 & occX~=.,by(occX) \ drop Y_r \ di "Y" \ di r(within_ge1)/r(ge1)

* Various combinations of robustness checks (results not shown)
reg earnings4 earnings_dad4 age age2 [aw=dlnwte] if danbcob==1 & dfmfcob==1101,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-4yr-Native")
reg earnings earnings_dad1973 age age2 [aw=dhhwtrp] ,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-2sample")
codebook occcode1971 dfmfocc if e(sample)
reg earnings earnings_dad1973 age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101 ,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native-2sample")
reg earnings4 earnings_dad1973 age age2 [aw=dhhwtrp] ,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-2sample-4yr")
codebook occcode1971 dfmfocc if e(sample)
reg earnings4 earnings_dad1973 age age2 [aw=dhhwtrp] if danbcob==1 & dfmfcob==1101 ,r
outreg using results_robust1.doc, se nocons bracket noaster append bdec(3) ct("IGE 2004-Native-2sample-4yr")

* Summary stats
reg hwage hwage_dad age age2 [aw=dhhwtrp],r
for any hwage hwage_dad: replace X=exp(X)
log using sumstats.smcl, append
di "2004 - 1 year"
sum hwage hwage_dad [aw=dhhwtrp] if e(sample)
log close
reg hwage4 hwage_dad4 age age2 [aw=dlnwte],r
for any hwage4 hwage_dad4: replace X=exp(X)
log using sumstats.smcl, append
di "2004 - 4 year"
sum hwage4 hwage_dad4 if e(sample)
log close

*********************************************** 
* US PSID
*********************************************** 
clear
set mem 50m
set more off
cd <DIRECTORY>

insheet using dadocc.csv, clear

destring, replace 
label variable er30001  "1968 INTERVIEW NUMBER"  
label variable er30002  "PERSON NUMBER                         68"  
label variable er33601  "2001 INTERVIEW NUMBER"  
label variable er33602  "SEQUENCE NUMBER                       01"  
label variable er33603  "RELATION TO HEAD                      01"  
label variable er17226  "B9-9A MAIN OCCUPATION: 3 DIGIT (HD-E)"  
label variable er19959  "L10-10A OCCUPATION OF FATHER"  
label variable er19968  "L20-20A OCCUPATION OF MOTHER"  
label variable q23j6  "OCCUPATION 02"  
label variable er33701  "2003 INTERVIEW NUMBER"  
label variable er33702  "SEQUENCE NUMBER                       03"  
label variable er33703  "RELATION TO HEAD                      03"  
label variable er23304  "K2/67 ST FATHER BORN-WF"  
label variable er23311  "K10-10A OCCUPATION OF FATHER"  
label variable er23396  "L10-10A OCCUPATION OF FATHER"  
label variable er23405  "L20-20A OCCUPATION OF MOTHER"  

gen x11101ll=(er30001*1000)+er30002
sort x11101ll

merge x11101ll using pequiv_01
tab _merge
drop _merge
cd  <DIRECTORY>

*********************************************** 
* Drop SEO sample (poverty/low-income oversample)
drop if er30001>=5001 & er30001<=6872 
*********************************************** 

* Checking the merge
tab  er33603 d1110501

keep if d11102ll==1 & d1110101>=25 & d1110101<=54 & e1110101>0
* Recoding missing wages and occupation codes as "."
for any er17226 er19959: recode X 0=. 998/999=.
for any i1111001: recode X 0=.
* Recoding minimum weights to zero
for any w1110101: recode X min/0=0
gen hwage=ln(i1111001/e1110101)
gen earnings=ln(i1111001)
gen age=d1110101
gen age2=d1110101^2

xi: reg hwage age age2 e1110101 i.er17226 [aw=w1110101]
replace age=40 
replace age2=1600
predict hwage_dad if e(sample)
replace age=d1110101
replace age2=d1110101^2
xi: reg earnings age age2 i.er17226 [aw=w1110101]
replace age=40 
replace age2=1600
predict earnings_dad if e(sample)

for any earnings_dad hwage_dad: gen t_X=X \ replace X=.
levelsof er17226, local(occ)
foreach o of local occ {
	gen temp1=t_hwage_dad if er17226==`o'
	egen temp2=max(temp1)
	replace hwage_dad=temp2 if er19959==`o'
	drop temp1 temp2
	gen temp1=t_earnings_dad if er17226==`o'
	egen temp2=max(temp1)
	replace earnings_dad=temp2 if er19959==`o'
	drop temp1 temp2
	}
drop t_*
replace age=d1110101
replace age2=d1110101^2

reg hwage hwage_dad age age2 [aw=w1110101], r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE US 2001")
codebook er19959 if e(sample)
for any hwage hwage_dad: egen Xz=std(X) if e(sample)
reg hwagez hwage_dadz age age2 [aw=w1110101],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGC US 2001")
drop hwagez hwage_dadz

* Correlation for Native-born fathers and sons
reg hwage hwage_dad age age2 if er23304>=1 & er23304<=56 [aw=w1110101],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGE US 2001-Native")
for any hwage hwage_dad: egen Xz=std(X) if e(sample)
reg hwagez hwage_dadz age age2 [aw=w1110101],r
outreg using results.doc, se nocons bracket noaster append bdec(3) ct("IGC US 2001-Native")
drop hwagez hwage_dadz

* Quintiles
reg hwage age age2
predict hwage_res if e(sample), resid
for any hwage_res hwage_dad: xtile X5=X [aw=w1110101] if e(sample),nq(5)
log using quintiles.smcl, append
di "US PSID"
tab hwage_res5 hwage_dad5, nofreq col
log close

* Summary stats
reg hwage hwage_dad age age2 [aw=w1110101], r
for any hwage hwage_dad: replace X=exp(X)
log using sumstats.smcl, append
di "US PSID"
sum hwage hwage_dad [aw=w1110101] if e(sample)
log close


* Program to calculate p-values from a 2-sample t-test
program drop _all
program define pvalues
set output error
clear
set obs 1
/*** create t-statistic for difference between 2 means ***/
gen diff=$mean1-$mean2
gen t = ($mean1-$mean2)/sqrt((($semean1*sqrt($n1))^2/$n1)+(($semean2*sqrt($n2))^2/$n2))
/*** create 2-tailed probability for t-statistic ***/
gen p = 2*(1-normal(abs(t)))
set output proc
sum diff t p 
end

* Test of difference - Australia 1965-2004 (Table 2)
for num 946 .257 .053 2115 .181 .043 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 946 .159 .033 2115 .169 .025 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 644 .247 .062 1320 .162 .060 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 644 .152 .038 1320 .172 .033 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues

* Test of difference - Australia Broadbanded 1965-2004 (Table 3)
for num 946 .257 .053 2115 .211 .041 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 946 .159 .033 2115 .151 .029 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 644 .247 .062 1320 .203 .055 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 644 .152 .038 1320 .151 .041 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues


* Test of difference - US versus Australia (Table 5)
for num 2115 .181 .043 356 .325 .075 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 2115 .169 .025 356 .264 .061 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 1320 .162 .060 211 .229 .096 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues
for num 1320 .172 .033 211 .194 .082 \ any n1 mean1 semean1 n2 mean2 semean2: set output error \ global Y X
pvalues